#!/usr/bin/python

from pyspark import SparkConf,SparkContext
#上传git
appName="pyWordCount"
master="spark://hadoop-server-00:7077"
conf = SparkConf().setAppName(appName).setMaster(master)
sc = SparkContext(conf=conf)

distFile = sc.textFile("hdfs://hadoop-server-00:9000/wordcount.txt")
flatMapRDD= distFile.flatMap(lambda a:a.split(" ")).map(lambda x:(x,1))
reduceRDD = flatMapRDD.reduceByKey(lambda x,y: x+y)
reduceRDD.saveAsTextFile("hdfs://hadoop-server-00:9000/out003")

